QC
2D UMAP
# manually set colors
cluster_colors <- c("#B5B9BA", # Pericytes and SMCs
"#3385BB", # BECs
"#40BBFF", # LECs
"#a5d5a9", # B cells
"#5dbd64", # Plasma cells
"#1c7e24", # T and NK cells
"#F57C7C", # ILCs
"#E42622", # Dendritic cells
"#FBB268", # Neutrophils
"#FE8D19", # Macrophages
"#DE9E83", # Myeloid precursors
"#A6CEE3", # Mast cells
"#9D7BBA", # Fibroblasts
"#977899") # Schwann cells
# plot umap
umap <- DimPlot(object = mouse.annotated,
reduction = "umap",
repel = TRUE,
group.by = "annotated_clusters",
cols = cluster_colors)
umap

3D UMAP
embeddings <- mouse.annotated@reductions$umap@cell.embeddings
embeddings <- cbind(embeddings, as.character(mouse.annotated$annotated_clusters))
colnames(embeddings)[4] <- "annotated_clusters"
embeddings <- as.data.frame(embeddings)
three.dim <- plot_ly(embeddings,
x = ~umap_1,
y = ~umap_2,
z = ~umap_3,
color = ~annotated_clusters,
colors = cluster_colors,
size = 1)
three.dim <- three.dim %>% add_markers()
three.dim <- three.dim %>% layout(scene = list(xaxis = list(title = 'UMAP_1'),
yaxis = list(title = 'UMAP_2'),
zaxis = list(title = 'UMAP_3')))
three.dim
Cluster markers
goi <- c("Pdgfrb","Acta2","Vwf","Cldn5","Pecam1","Flt4","Prox1","Lyve1","Ptprc",
"Cd19","Ms4a1","Ighd","Igha","Sdc1","Cd3e","Trbc2","Il7r","Nkg7","Klrb1b",
"Klrb1c","Gata3","Rora","Itgax","H2-Eb1","Ccr2","Ly6c2","Lyz2","Ly6g","Itgam",
"Mrc1","Csf1r","Cd38","Mki67","Mcpt4","Ms4a2","Col1a2","Plp1")
v <- VlnPlot(mouse.annotated,
features = goi,
split.by = "annotated_clusters",
flip = TRUE,
stack = TRUE,
cols = cluster_colors)
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##
## This message will be shown once per session.
v

path <- paste0(out, "markers/")
pdf(paste0(path, "sandro_markers_violin_annotated.pdf"), width = 10, height = 8)
v
dev.off()
## png
## 2
Split UMAP
# Apoe isoform
umap1 <- DimPlot(object = mouse.annotated,
group.by = "annotated_clusters",
reduction = "umap",
split.by = "isoform",
repel = TRUE,
cols = cluster_colors)
umap1

# sex
umap2 <- DimPlot(object = mouse.annotated,
group.by = "annotated_clusters",
reduction = "umap",
split.by = "sex",
repel = TRUE,
cols = cluster_colors)
umap2

# sample
umap3 <- DimPlot(object = mouse.annotated,
group.by = "annotated_clusters",
reduction = "umap",
split.by = "sample",
ncol = 2,
repel = TRUE,
cols = cluster_colors)
umap3

# phase
umap4 <- DimPlot(object = mouse.annotated,
group.by = "annotated_clusters",
reduction = "umap",
split.by = "phase",
cols = cluster_colors,
repel = TRUE)
umap4

# mito.factor
umap5 <- DimPlot(object = mouse.annotated,
group.by = "annotated_clusters",
reduction = "umap",
split.by = "mito.factor",
cols = cluster_colors,
ncol = 2,
repel = TRUE)
umap5

# sample2
umap6 <- DimPlot(object = mouse.annotated,
group.by = "annotated_clusters",
reduction = "umap",
split.by = "sample2",
cols = cluster_colors,
ncol = 2,
repel = TRUE)
umap6

Heatmap UMAP
# UMAP percent.mt
f1 <- FeaturePlot(mouse.annotated,
reduction = "umap",
features = "percent.mt") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f1

# UMAP nCount
f2 <- FeaturePlot(mouse.annotated,
reduction = "umap",
features = "nCount_RNA") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f2

# UMAP nFeature
f3 <- FeaturePlot(mouse.annotated,
reduction = "umap",
features = "nFeature_RNA") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f3

# UMAP percent.ribo
f4 <- FeaturePlot(mouse.annotated,
reduction = "umap",
features = "percent.ribo.protein") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f4

# UMAP cell.complexity
f5 <- FeaturePlot(mouse.annotated,
reduction = "umap",
features = "cell.complexity") +
scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f5

Percent cells per cluster
# isoform
b1 <- mouse.annotated@meta.data %>%
group_by(annotated_clusters, isoform) %>%
dplyr::count() %>%
group_by(annotated_clusters) %>%
dplyr::mutate(percent = 100*n/sum(n)) %>%
ungroup() %>%
ggplot(aes(x=annotated_clusters,y=percent, fill=isoform)) +
theme_classic() +
geom_col() +
scale_fill_manual(values = isoform_colors) +
ggtitle("Percentage of isoform per cluster") +
theme(axis.text.x = element_text(angle = 90,vjust = 0.5, hjust=1))
b1

# sex
b2 <- mouse.annotated@meta.data %>%
group_by(annotated_clusters, sex) %>%
dplyr::count() %>%
group_by(annotated_clusters) %>%
dplyr::mutate(percent = 100*n/sum(n)) %>%
ungroup() %>%
ggplot(aes(x=annotated_clusters,y=percent, fill=sex)) +
theme_classic() +
geom_col() +
scale_fill_manual(values = sex_colors) +
ggtitle("Percentage of sex per cluster") +
theme(axis.text.x = element_text(angle = 90,vjust = 0.5, hjust=1))
b2

# sample
b3 <- mouse.annotated@meta.data %>%
group_by(annotated_clusters, sample) %>%
dplyr::count() %>%
group_by(annotated_clusters) %>%
dplyr::mutate(percent = 100*n/sum(n)) %>%
ungroup() %>%
ggplot(aes(x=annotated_clusters,y=percent, fill=sample)) +
theme_classic() +
geom_col() +
scale_fill_manual(values = sample_colors) +
ggtitle("Percentage of sample per cluster") +
theme(axis.text.x = element_text(angle = 90,vjust = 0.5, hjust=1))
b3

# phase
b4 <- mouse.annotated@meta.data %>%
group_by(annotated_clusters, phase) %>%
dplyr::count() %>%
group_by(annotated_clusters) %>%
dplyr::mutate(percent = 100*n/sum(n)) %>%
ungroup() %>%
ggplot(aes(x=annotated_clusters,y=percent, fill=phase)) +
theme_classic() +
geom_col() +
ggtitle("Percentage of phase per cluster") +
theme(axis.text.x = element_text(angle = 90,vjust = 0.5, hjust=1))
b4

# mito.factor
b5 <- mouse.annotated@meta.data %>%
group_by(annotated_clusters, mito.factor) %>%
dplyr::count() %>%
group_by(annotated_clusters) %>%
dplyr::mutate(percent = 100*n/sum(n)) %>%
ungroup() %>%
ggplot(aes(x=annotated_clusters,y=percent, fill=mito.factor)) +
theme_classic() +
geom_col() +
ggtitle("Percentage of mito.factor per cluster") +
theme(axis.text.x = element_text(angle = 90,vjust = 0.5, hjust=1))
b5

Cluster tree
# build tree
Idents(mouse.annotated) <- mouse.annotated$annotated_clusters
mouse.annotated <- BuildClusterTree(object = mouse.annotated,
dims = 1:15,
reorder = FALSE,
reorder.numeric = FALSE)
tree <- mouse.annotated@tools$BuildClusterTree
tree$tip.label <- paste0(tree$tip.label)
# plot tree
p <- ggtree::ggtree(tree, aes(x, y)) +
scale_y_reverse() +
ggtree::geom_tree() +
ggtree::theme_tree() +
ggtree::geom_tiplab(offset = 1) +
ggtree::geom_tippoint(color = cluster_colors[1:length(tree$tip.label)], shape = 16, size = 5) +
coord_cartesian(clip = 'off') +
theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))
p

Cells per sample
# extract data
data <- as.data.frame(table(mouse.annotated$sample))
colnames(data) <- c("sample","frequency")
# plot
ncells <- ggplot(data, aes(x = sample, y = frequency, fill = sample)) +
geom_col() +
theme_classic() +
geom_text(aes(label = frequency),
position=position_dodge(width=0.9),
vjust=-0.25) +
scale_fill_manual(values = sample_colors) +
scale_y_continuous(breaks = seq(0,8000, by = 1000), limits = c(0,8000)) +
ggtitle("Filtered: cells per sample") +
theme(legend.position = "none") +
theme(axis.text.x = element_text(angle = 45, hjust=1))
ncells

Cells per cluster
# extract data
data <- as.data.frame(table(mouse.annotated$annotated_clusters))
colnames(data) <- c("cluster","frequency")
# plot
ncells <- ggplot(data, aes(x = cluster, y = frequency, fill = cluster)) +
geom_col() +
theme_classic() +
geom_text(aes(label = frequency),
position=position_dodge(width=0.9),
vjust=-0.25) +
scale_fill_manual(values = cluster_colors) +
scale_y_continuous(breaks = seq(0,5000, by = 1000), limits = c(0,5000)) +
ggtitle("Cells per Cluster") +
theme(legend.position = "none") +
theme(axis.text.x = element_text(angle = 45, hjust=1))
ncells

Differential expression
E4 female vs E3 female within each cluster
# initialize variables
cell_types <- levels(mouse.annotated$annotated_clusters)
female.df <- data.frame()
# loop through clusters
for (i in cell_types) {
print(i)
cluster <- subset(mouse.annotated, annotated_clusters == i)
Idents(cluster) <- cluster$sample
markers <- FindMarkers(object = cluster,
ident.1 = "E4.F",
ident.2 = "E3.F",
only.pos = FALSE, # default
min.pct = 0.10, # default
test.use = "MAST",
verbose = TRUE,
assay = "RNA")
if(nrow(markers) == 0) {
next
}
markers$cluster <- i
markers$gene <- rownames(markers)
female.df <- rbind(female.df, markers)
}
# reformat table
colnames(female.df)[c(3,4)] <- c("percent_E4_F","percent_E3_F")
rownames(female.df) <- 1:nrow(female.df)
female.df$percent_difference <- abs(female.df$percent_E4_F - female.df$percent_E3_F)
female.df <- female.df[,c(6,7,1,5,2,3,4,8)]
# write table
write.table(female.df,
file = paste0(out, "DEGs/DEG_tables/E4_F_vs_E3_F_DEGs.tsv"),
sep = "\t", quote = FALSE, row.names = FALSE)
# print cell types that don't have DEGs
levels(mouse.annotated$annotated_clusters)[!levels(mouse.annotated$annotated_clusters) %in% unique(female.df$cluster)]
E4 male vs E3 male within each cluster
# initialize variable
cell_types <- levels(mouse.annotated$annotated_clusters)
male.df <- data.frame()
# loop through clusters
for (i in cell_types) {
print(i)
cluster <- subset(mouse.annotated, annotated_clusters == i)
Idents(cluster) <- cluster$sample
markers <- FindMarkers(object = cluster,
ident.1 = "E4.M",
ident.2 = "E3.M",
only.pos = FALSE, # default
min.pct = 0.10, # default
test.use = "MAST",
verbose = TRUE,
assay = "RNA")
if(nrow(markers) == 0) {
next
}
markers$cluster <- i
markers$gene <- rownames(markers)
male.df <- rbind(male.df, markers)
}
# reformat table
colnames(male.df)[c(3,4)] <- c("percent_E4_M","percent_E3_M")
rownames(male.df) <- 1:nrow(male.df)
male.df$percent_difference <- abs(male.df$percent_E4_M - male.df$percent_E3_M)
male.df <- male.df[,c(6,7,1,5,2,3,4,8)]
# write table
write.table(male.df,
file = paste0(out, "DEGs/DEG_tables/E4_M_vs_E3_M_DEGs.tsv"),
sep = "\t",quote = FALSE, row.names = FALSE)
# print cell types that don't have DEGs
levels(mouse.annotated$annotated_clusters)[!levels(mouse.annotated$annotated_clusters) %in% unique(male.df$cluster)]
Compare DEGs
# read tables
female.df <- read.table(paste0(out, "DEGs/DEG_tables/E4_F_vs_E3_F_DEGs.tsv"),
sep = "\t",
header = TRUE)
male.df <- read.table(paste0(out, "DEGs/DEG_tables/E4_M_vs_E3_M_DEGs.tsv"),
sep = "\t",
header = TRUE)
# filter
female.df <- female.df[female.df$p_val_adj < 0.05,]
male.df <- male.df[male.df$p_val_adj < 0.05,]
# add columns
direction <- female.df$avg_log2FC > 0
direction <- gsub(TRUE, "E4_female_up", direction)
direction <- gsub(FALSE, "E4_female_down", direction)
female.df$direction <- direction
direction <- male.df$avg_log2FC > 0
direction <- gsub(TRUE, "E4_male_up", direction)
direction <- gsub(FALSE, "E4_male_down", direction)
male.df$direction <- direction
# reformat tables
female.df2 <- female.df %>%
dplyr::count(cluster,direction) %>%
tidyr::spread(cluster, n)
male.df2 <- male.df %>%
dplyr::count(cluster,direction) %>%
tidyr::spread(cluster, n)
# master table
df <- smartbind(female.df2, male.df2)
df[is.na(df)] <- 0
df
# cell types with 0 DEGs in every comparison are not shown
# add column with all 0s
cell_types <- c(levels(mouse.annotated$annotated_clusters), "direction")
for (i in cell_types) {
if (!i %in% colnames(df)) {
print(i)
df[[i]] <- 0
}
}
# save
write.table(df,
file = paste0(out, "DEGs/DEG_tables/DEG_comparison_pvaladj_0.05.tsv"),
sep = "\t",
quote = FALSE,
row.names = FALSE)
DEG heatmap
# set heatmap colors and names
paletteLength <- 100
myColor <- colorRampPalette(c("white","#f0eb9e","darkgreen"))(paletteLength)
# reformat table
df2 <- df
rownames(df2) <- df2$direction
df2$direction <- NULL
# save
path <- paste0(out, "DEGs/DEG_tables/DEG_comparison_heatmap_pvaladj_0.05.pdf")
pdf(path, width = 10, height = 6)
# plot
pheatmap::pheatmap(df2,
main = "FDRq < 0.05, |LFC| > 0",
treeheight_row = 0,
treeheight_col = 0,
color = myColor,
cluster_rows = FALSE,
display_numbers = round(df2, digits = 0),
fontsize_number = 12,
number_color = "black")
Volcano
variables <- c("E4_F_vs_E3_F","E4_M_vs_E3_M")
all_clusters <- levels(mouse.annotated$annotated_clusters)
for (i in variables) {
# read DEG file
if (i == "E4_F_vs_E3_F") {
treatment_vs_control <-
read.delim(paste0(out, "DEGs/DEG_tables/E4_F_vs_E3_F_DEGs.tsv"), sep = "\t")
} else {
treatment_vs_control <-
read.delim(paste0(out, "DEGs/DEG_tables/E4_M_vs_E3_M_DEGs.tsv"), sep = "\t")
}
# assign colors
color_values <- vector()
max <- nrow(treatment_vs_control)
for(row in 1:max){
if (treatment_vs_control$p_val_adj[row] < 0.05){
if (treatment_vs_control$avg_log2FC [row] > 0){
color_values <- c(color_values, 1) # 1 when logFC > 0 and FDRq < 0.05
}
else if (treatment_vs_control$avg_log2FC[row] < 0){
color_values <- c(color_values, 2) # 2 when logFC < 0 and FDRq < 0.05
}
}
else{
color_values <- c(color_values, 3) # 3 when FDRq >= 0.05
}
}
treatment_vs_control$color_adjpval_0.05 <- factor(color_values)
# loop through clusters
for (j in all_clusters) {
# subset cluster
data <- subset(treatment_vs_control, cluster == j)
# plot only if there are DEGs with p_val_adj < 0.05
num <- subset(data, p_val_adj < 0.05)
num <- nrow(num)
if(num != 0) {
# subset genes to label
up <- data[data$color_adjpval_0.05 == 1,]
up10 <- up[1:10,]
down <- data[data$color_adjpval_0.05 == 2,]
down10 <- down[1:10,]
# set manual colors
if (!1 %in% unique(data$color_adjpval_0.05)) {
my_colors <- c("blue","gray")
} else if (!2 %in% unique(data$color_adjpval_0.05)) {
my_colors <- c("red","gray")
} else if (!1 %in% unique(data$color_adjpval_0.05) && !2 %in% unique(data$color_adjpval_0.05)) {
my_colors <- c("gray")
} else {
my_colors <- c("red","blue","gray")
}
# set significance threshold
hadjpval <- (-log10(max(
data$p_val[data$p_val_adj < 0.05],
na.rm=TRUE)))
# plot
p <-
ggplot(data = data,
aes(x = avg_log2FC, # x-axis is logFC
y = -log10(p_val), # y-axis will be -log10 of P.Value
color = color_adjpval_0.05)) + # color is based on factored color column
geom_point(alpha = 0.8, size = 2) + # create scatterplot, alpha makes points transparent
theme_bw() + # set color theme
theme(legend.position = "none") + # no legend
scale_color_manual(values = my_colors) + # set factor colors
labs(
title = "", # no main title
x = expression(log[2](FC)), # x-axis title
y = expression(-log[10] ~ "(" ~ italic("p") ~ "-value)") # y-axis title
) +
theme(axis.title.x = element_text(size = 10),
axis.text.x = element_text(size = 10)) +
theme(axis.title.y = element_text(size = 10),
axis.text.y = element_text(size = 10)) +
geom_hline(yintercept = hadjpval, # horizontal line
colour = "#000000",
linetype = "dashed") +
ggtitle(paste0(j,"\n",i,", p_val_adj < 0.05")) +
geom_text_repel(data = up10,
aes(x = avg_log2FC, y= -log10(p_val), label = gene),
color = "maroon",
fontface="italic",
max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
) +
geom_text_repel(data = down10,
aes(x = avg_log2FC, y= -log10(p_val), label = gene),
color = "navyblue",
fontface="italic",
max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
)
p
# save
clus <- j
clus <- gsub(" ","_",clus)
clus <- gsub("/","_",clus)
clus <- gsub("-","_",clus)
if (i == "E4_F_vs_E3_F") {
path <- paste0(out, "DEGs/volcano/E4_F_vs_E3_F/", tolower(clus),"_female_volcano")
} else {
path <- paste0(out, "DEGs/volcano/E4_M_vs_E3_M/", tolower(clus),"_male_volcano")
}
pdf(paste(path,".pdf"), height = 8, width = 8)
print(p)
dev.off()
print(paste("i =",i,", j =",j))
}
} # end loop through clusters
} # end loop through variables
Upset plot
# read tables
female.df <- read.table(paste0(out, "DEGs/DEG_tables/E4_F_vs_E3_F_DEGs.tsv"),
sep = "\t",
header = TRUE)
male.df <- read.table(paste0(out, "DEGs/DEG_tables/E4_M_vs_E3_M_DEGs.tsv"),
sep = "\t", header = TRUE)
# filter tables
female.df <- female.df[female.df$p_val_adj < 0.05,]
male.df <- male.df[male.df$p_val_adj < 0.05,]
# get cell types and remove any with 0 DEGs in every comparison
clusters <- levels(mouse.annotated$annotated_clusters)
clusters <- clusters[!clusters %in% "LECs"]
for (i in clusters) {
# Subset df by cluster
print(i)
female <- subset(female.df, female.df$cluster == i)
sex <- subset(male.df, male.df$cluster == i)
# Subset lists
female_up <- subset(female$gene, female$avg_log2FC > 0)
female_down <- subset(female$gene, female$avg_log2FC < 0)
male_up <- subset(sex$gene, sex$avg_log2FC > 0)
male_down <- subset(sex$gene, sex$avg_log2FC < 0)
list_input <- list("E4 Female Up-regulated" = female_up,
"E4 Male Up-regulated" = male_up,
"E4 Female Down-regulated" = female_down,
"E4 Male Down-regulated" = male_down)
data <- fromList(list_input)
# store names
names <- c("E4 Male Down-regulated","E4 Female Down-regulated",
"E4 Male Up-regulated","E4 Female Up-regulated")
# plot
upset_gene <- ComplexUpset::upset(data,
names,
set_sizes=(
upset_set_size()
+ geom_text(aes(label=..count..), hjust=1.1, stat='count')
+ expand_limits(y=1100)),
queries = list(upset_query("E4 Female Up-regulated", fill = "red"),
upset_query("E4 Male Up-regulated", fill = "red"),
upset_query("E4 Female Down-regulated", fill = "blue"),
upset_query("E4 Male Down-regulated", fill = "blue")),
base_annotations = list('Intersection size' = (
intersection_size(bar_number_threshold=1, width=0.5)
+ scale_y_continuous(expand=expansion(mult=c(0, 0.05)),limits = c(0,1000)) # space on top
+ theme(
# hide grid lines
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
# show axis lines
axis.line=element_line(colour='black')))),
stripes = upset_stripes(
geom=geom_segment(size=12), # make the stripes larger
colors=c('grey95', 'white')),
# to prevent connectors from getting the colorured
# use `fill` instead of `color`, together with `shape='circle filled'`
matrix = intersection_matrix(
geom=geom_point(
shape='circle filled',
size=3,
stroke=0.45)),
sort_sets=FALSE,
sort_intersections='descending'
)
upset_gene <- upset_gene + ggtitle(paste0(i,", adj_p_val < 0.05"))
i <- gsub(" ","_",i)
pdf(paste0(out, "DEGs/upset/",tolower(i),"_upset.pdf"), height = 6, width = 8)
print(upset_gene)
dev.off()
}